# Copyright 2019 DeepMind Technologies Ltd. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Exploitability of a policy from IS-MCTS search run at each info state."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from absl import app
from absl import flags

from open_spiel.python import policy
from open_spiel.python.algorithms import exploitability
import pyspiel

FLAGS = flags.FLAGS
flags.DEFINE_string("game", "kuhn_poker", "Name of the game")

SEED = 129846127


def construct_is_mcts_policy(game, state, tabular_policy, bot, searched):
  """Constructs a tabular policy from independent bot calls.

  Args:
    game: an OpenSpiel game,
    state: an OpenSpiel state to start the tree walk from,
    tabular_policy: a policy.TabularPolicy for this game,
    bot: the bot to get the policy from at each state
    searched: a dictionary of information states already search (empty to begin)
  """

  if state.is_terminal():
    return
  elif state.is_chance_node():
    outcomes = state.legal_actions()
    for outcome in outcomes:
      new_state = state.clone()
      new_state.apply_action(outcome)
      construct_is_mcts_policy(game, new_state, tabular_policy, bot, searched)
  else:
    infostate_key = state.information_state_string()
    if infostate_key not in searched:
      searched[infostate_key] = True
      infostate_policy = bot.get_policy(state)
      tabular_state_policy = tabular_policy.policy_for_key(infostate_key)
      for action, prob in infostate_policy:
        tabular_state_policy[action] = prob
    for action in state.legal_actions():
      new_state = state.clone()
      new_state.apply_action(action)
      construct_is_mcts_policy(game, new_state, tabular_policy, bot, searched)


def main(_):
  game = pyspiel.load_game(FLAGS.game)
  evaluator = pyspiel.RandomRolloutEvaluator(1, SEED)
  min_expl = game.max_utility() -  game.min_utility()

  print("{:>5} {:>10} {:>50} {:>20}".format(
      "max_sims", "uct_c", "final_policy_type", "exploitability"))
  for max_simulations in [10, 100, 1000, 10000]:
    for uct_c in [0.2, 0.5, 1.0, 2.0, 4.0]:  # These values are for Kuhn.
      for final_policy_type in [
          pyspiel.ISMCTSFinalPolicyType.NORMALIZED_VISIT_COUNT,
          pyspiel.ISMCTSFinalPolicyType.MAX_VISIT_COUNT,
          pyspiel.ISMCTSFinalPolicyType.MAX_VALUE
      ]:
        tabular_policy = policy.TabularPolicy(game)
        bot = pyspiel.ISMCTSBot(SEED, evaluator, uct_c, max_simulations, -1,
                                final_policy_type, False, False)
        searched = {}
        construct_is_mcts_policy(game, game.new_initial_state(), tabular_policy,
                                 bot, searched)
        expl = exploitability.exploitability(game, tabular_policy)
        print("{:>5} {:>10} {:>50} {:>20}".format(max_simulations, uct_c,
                                                  str(final_policy_type), expl))
        if expl < min_expl:
          min_expl = expl
  print("Min expl: {}".format(min_expl))


if __name__ == "__main__":
  app.run(main)
